import subprocess

def extract_video_segment(input_path, output_path):
    """
    截取视频的特定时间段并保存。

    参数：
    input_path (str): 输入视频文件的路径。
    start_time (str): 起始时间，格式为 "HH:MM:SS"。
    end_time (str): 终止时间，格式为 "HH:MM:SS"。
    output_path (str): 输出视频文件的路径。
    """
    command = [
        'cp',
        input_path,
        output_path
    ]
    subprocess.run(command)


input_file = "/run/determined/NAS1/public/MSRVTT/test_videodatainfo.json"
output_txt = "/home/yunzhu/opensora_distill/assets/texts/t2v_msrvtt.txt"
video_dir = "/run/determined/NAS1/public/MSRVTT/videos"
save_dir = "/run/determined/local6/msrttv/"
import json
import os
import random
with open(input_file, "r") as f:
    sentences = json.load(f)["sentences"]
random.shuffle(sentences)

videoid = 0
prompt_list = []
for i, info in enumerate(sentences[:2048]):
    video_name = info["video_id"] + ".mp4"
    video_path = os.path.join(video_dir, video_name)
    save_video_name = f"{videoid:04d}.mp4" 
    save_path = os.path.join(save_dir, save_video_name)
    extract_video_segment(video_path, save_path)
    videoid += 1
    prompt_list.append(info["caption"])


with open(output_txt, "w") as f:
    for line in prompt_list:
        f.write(line + "\n")

